##Multivariate resemblance## #libraries sources needed and stuff library(vegan) library(permute) library(simba) library(cluster) library(ecodist) #load go data methtis<-read.csv('100tiss.csv',header=TRUE,row.names=1) methtis.log<-log(methtis+1) hist.plots(methtis.log) methtis.jac<-sim(methtis,method="euclid") methtis.jac methtis.pcoa<-cmdscale(methtis.jac,k=35,eig=TRUE,add=TRUE) methtis.pcoa #want to know the % variation explained by any principal component? then divide each eigenvalue by the sum of the eigenvalues across all PCs. Below looking at first 5 PCs methtis.pcoa$eig[1:5]/sum(methtis.pcoa$eig)*100 #look for signifiance of each PC by compare eigenvalues to expectations according to broken stick model plot(methgo.pcoa$eig/sum(methgo.pcoa$eig)*100-bstick(45)*100,xlab="PC", ylab="Actual-random percent variation explained") abline(h=0) #look at ordination plot of PC and PC2 ordiplot(methtis.pcoa, choices=c(1,2), type="text", display="sites", xlab="PC1 (1%)", ylab="PC2 (1%)") #Now calculate the PCA loadings (i.e. variable weights) #here performs a linear regression between each of the original desriptors (species)and the scores from each PC. A permutation test is used to assess statistical significance vec.methtis<-envfit(methtis.pcoa$points, methtis.log,perm=1000) #look at values vec.methtis #now plot the eigenvectors on the ordi plot ordiplot(methtis.pcoa, choices=c(1,2), type="text", display="sites", xlab='PC1 (60%)', ylab="PC2 (10%)") #same plot just w/0 labels ordiplot(methtis.pcoa, choices=c(1,2), xlab='PC1 (60%)', ylab="PC2 (10%)") plot(vec.methtis, p.max=0.9, col="blue") #perform NMBS on log transformed data distrance is distance coefficient to use (default is bray-curtis), k is the number of ordnation axes to generate, autotransform default is TRUE, will transform the data first (usually square root), trymax is the number of random starts to try, default is 20 methtis.nmds<-metaMDS(methtis.log,distance='bray', k=2, autotransform=FALSE, trymax=100) #look at it methtis.nmds #get a list of objects resulting from the analysis names(meththis.nmds) #evaluate the number of dimensions with a scree plot nmds.scree(methtis.log, distance='bray', k=10, autotransform=FALSE, trymax=20) #or try the randomization thing nmds.monte(methtis.log, distance='bray', k=3, autotransform=FALSE, trymax=20) #how good a job does NMDS do? check by looking at corrlation between the caclulated dissimilarities and the plotted values. Specifically, plot original dissimilarities and euclidean distances in the ordination using stressplot() stressplot(methtis.nmds) #look at a plot of the NMDS plot(methtis.nmds, type='n') text(methtis.nmds, labels=row.names(meth)) #if we'd like to see how a particular descriptor (in this case, a weird fish abundance) changes with location we can make te symbol size proportional to log abundance plot(meth.nmds, type='n') points(meth.nmds,cex=meth.log$X.CG.in.MBD.exon) #if we want to look at the sample scores, which are the coordinates of the samples in k-dimensional ordination, they are stored in the object names points meth.nmds$points #to calculate species loadings (i.e. variable weights) on each derived axis, use envfit with the NMDS scores vec.meth<-envfit(meth.nmds$points, meth.log, perm=1000) vec.meth #NOW these loadings can be plotted ordiplot(meth.nmds, choices=c(1,2), xlab='Axis 1', ylab='Axis 2',) plot(vec.meth, p.max=0.01, col='blue') library(RColorBrewer) library(gplots) #ok, it worked with this template, so now I'm going to try pulling in all the genes. log transforming the data. saving the file as a txt file. then changing to a dat go.exp<-read.csv("methgo.csv",header=TRUE,row.names=1) #transpose gologt<-t(go.exp) mat=data.matrix(go.exp) pdf("gooys2.pdf", height=10, width=10) heatmap.2(mat, Rowv=TRUE, Colv=TRUE, # dendrogram= c("none"), distfun = dist, hclustfun = hclust, xlab = "X data", ylab = "Y data", key=TRUE, keysize=1, trace="none", density.info=c("none"), margins=c(10, 8), col=brewer.pal(10,"PiYG") # col=redgreen(75), ) dev.off()